import os
import sys


target = sys.argv[1]

if target == "genome":
    assembly = "hg38"
    fasta = "%s.fa" % assembly
    chrom_sizes = "%s.chrom.sizes" % assembly
    twobit = "%s.2bit" % assembly
    memory = "16G"
    flag = 4  # unmapped
else:
    fasta = "/osc-fs_home/mdehoon/Data/CASPARs/Filters/%s.fa" % target
    chrom_sizes = "/osc-fs_home/mdehoon/Data/CASPARs/Filters/%s.chrom.sizes" % target
    twobit = "%s.2bit" % target
    memory = "16G"
    flag = 20  # unmapped, or mapped to the opposite strand


filenames = []
for filename in os.listdir("."):
    basename, extension = os.path.splitext(filename)
    if extension != ".fa":
        continue
    try:
        seqlist, number = basename.split("_")
    except ValueError:
        continue
    assert seqlist == "seqlist"
    number = int(number)
    filenames.append([number, filename])

filenames.sort()
n = 0
for number, filename in filenames:
    assert number == n
    n += 1

filenames = [filename for number, filename in filenames]


jobs = []
for number in range(n):
    job = "script_%s_%d" % (target, number)
    jobs.append(job)
    script_name = "%s.sh" % job
    handle = open(script_name, 'w')
    handle.write("""\
#!/bin/bash
#SBATCH -e ./%s.stderr
#SBATCH -o ./%s.stdout
#SBATCH --mem=%s
""" % (job, job, memory))
    handle.write("""\
bwa mem -O 0 -E 1 -A 1 -B 1 -T 10 -k 10 -c 100000000 -a -Y %s seqlist_%d.fa | samtools view -F %d -u | bamToPsl - stdout | pslCheck stdin -pass=stdout -quiet 2> %s.%d.out | sort -k 14 | pslRecalcMatch stdin %s seqlist_%d.fa stdout | sort -k 10 > %s.%d.psl
""" % (fasta, number, flag, target, number, twobit, number, target, number))
    handle.close()


output = open("script.sh", 'w')
for job in jobs:
    output.write('sbatch %s.sh\n' % job)
output.close()
print("%d scripts generated; run script.sh to schedule them on Grid Engine" % len(jobs))
